library(dplyr)
library(lubridate)
library(zoo)
library(ggplot2)
library(limma)
library(ggpubr)
library(grid)
library(plotly)
#Data Files and prep work
source("../../lib/DataProccess.R")
source("../../lib/NormFuncs.R")
source("../../lib/OutlierDetectionFuncs.R")
source("../../lib/DataPathName.R")
BaseDir <- params$BaseDir#get the root of the directory where the data is stored
“Files Used:”
COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv
RankingDF <- LIMSFullDF%>%
#filter(Date<mdy("10/31/2021"))%>%
#group_by(Site)%>%
arrange(Date)%>%
mutate(N1 = ifelse(!is.na(N1),N1,0))%>%
mutate(N2 = ifelse(!is.na(N2),N2,0))%>%
mutate(N1RankLeft = rank(desc(N1 - lag(N1))),
N1RankRight = rank(desc(N1 - lead(N1))),
N2RankLeft = rank(desc(N2 - lag(N2))),
N2RankRight = rank(desc(N2 - lead(N2))))%>%
select(Date,Site,N1RankLeft,N1RankRight,N2RankLeft,N2RankRight,N1,N2)%>%
mutate(MaxN1Rank = -pmax(N1RankLeft,N1RankRight,N2RankLeft,N2RankRight))
BreakUsed <- c(.90,.95)
Vec <- PlotlyView(RankingDF, "MaxN1Rank", QuintileBound, Threshold = BreakUsed)
Vec[[1]]
Vec[[2]]
RankingDF%>%
filter(-MaxN1Rank<=10)%>%
arrange(Date,Site)
#Instead Max 10, Max 20
#Rolling SD Vs Rolling mean
#1) Show everthing for clear outliers
#2) Show soft with 25
Vec[[3]]%>%
arrange(FlagScheme)%>%
#select(-)%>%
rename(ErrorLevel = FlagScheme, Rank = `MaxN1Rank`)%>%
mutate(Rank = -Rank)%>%
arrange(ErrorLevel, Rank)#%>%
#write.csv("RmdOutput/PossibleOutliers.csv")
ThresholdFlagDF <- RankingDF%>%
mutate(ThresholdOutlier= -MaxN1Rank<=12)%>%
arrange(Date,Site)%>%
ggplot(aes(x = Date))+
theme(panel.spacing.y = unit(2, "lines"))+
geom_point(aes(y = N1, color = Site, shape = "N1", info = -MaxN1Rank,
alpha = ThresholdOutlier, size = ThresholdOutlier))+
geom_point(aes(y = N2, color = Site, shape = "N2", info = -MaxN1Rank,
alpha = ThresholdOutlier, size = ThresholdOutlier))
GGPlotlyWithScaling(ThresholdFlagDF,2, Display = c("N1","N2", "Date", "info"))
ThresholdFlagDF <- RankingDF%>%
mutate(MaxN1Rank = -MaxN1Rank, ThresholdOutlier = ifelse(MaxN1Rank<=47,"BFlagged","ANotFlagged"),
ThresholdOutlier= ifelse(MaxN1Rank<=12, "CFlaggedRestrictive", ThresholdOutlier))%>%
arrange(Date,Site)%>%
ggplot(aes(x = Date))+
theme(panel.spacing.y = unit(1, "lines"))+
geom_point(aes(y = N1, color = ThresholdOutlier, shape = "N1",
alpha = ThresholdOutlier, size = ThresholdOutlier, info = MaxN1Rank))+
geom_point(aes(y = N2, color = ThresholdOutlier, shape = "N2",
alpha = ThresholdOutlier, size = ThresholdOutlier, info = MaxN1Rank))+
facet_wrap(~Site, ncol = 1, scales = "free")
GGPlotlyWithScaling(ThresholdFlagDF,3, Display = c("N1","N2", "Date", "info"))